---
title: "PS replication"
author: "Olivier Bergeron-Boutin"
date: "`r Sys.Date()`"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Data and packages

```{r}
library(tidyverse)
library(ggplot2)
library(ggtext)
library(countrycode)
library(ggpubr)

source("scripts/functions.R")

# Data from waves 1-19 of Bright Line Watch expert surveys
# This dataset has been pared down to only include relevant variables
expert_combined = read_csv("data/expert_combined_waves.csv")

# Data from all Bright Line Watch public surveys
# Note that some waves of BLW surveys only included the expert sample
public_combined = read_csv("data/public_combined.csv")
```

# Figure 1: ratings by participation

```{r}
# for each unique LuthID, compute in how many waves it appears
waves_by_resp = 
  expert_combined %>% 
  filter(wave != 19) %>% 
  count(LuthID) %>% 
  rename(waves_participate = n)

# frequencies of the above
table(waves_by_resp$waves_participate)


fig1_data = expert_combined %>% 
  # exclude wave 19 (June/July 2023)
  # when launching wave 19, the LuthID of the first ~120 respondents were not
  # properly captured 
  filter(wave != 19) %>% 
  select(LuthID, wave, rusa_now) %>% 
  # merge with data on number of waves that each LuthID appears in
  left_join(
    waves_by_resp, by = "LuthID"
  ) %>% 
  # we'll look only at waves 2 through 5
  filter(wave %in% 2:5) %>% 
  # compute mean rating by wave and number of waves of waves participated in 
  group_by(wave, waves_participate) %>% 
  summarise(mean_rating = mean(rusa_now, na.rm = T),
            n = n()) %>% 
  # join mean rating by wave (all respondents) to make horizontal lines
  left_join(
    expert_combined %>% 
      filter(wave != 19) %>% 
      group_by(wave) %>% 
      summarise(mean_rating_all = mean(rusa_now, na.rm = T)),
    by = "wave"
  ) %>% 
  # just  a few people participated in 15+ waves; exclude those for 
  # simpler graph
  filter(waves_participate <= 14) %>% 
  # to have nicer labels for graph
  mutate(wave = paste0("Wave ", wave) %>% 
           factor(levels = paste0("Wave ", 1:18)))

fig1_data %>% 
  ggplot(aes(x = waves_participate, y = mean_rating, size = n)) +
  geom_point(position = position_dodge(0.5)) +
  geom_hline(aes(yintercept = mean_rating_all), linetype = "dashed") +
  scale_x_continuous(breaks = seq(1, 14, 1), minor_breaks = NULL) +
  scale_y_continuous(limits = c(0, 100), 
                     breaks = seq(0, 100, 10)) +
  labs(y = "Mean rating of American democracy", 
       x = "Number of BLW waves the respondent participated in",
       caption = "Horizontal lines show unconditional means by wave.\nPoint size represents the number of respondents.",
       title = "Ratings of American democracy in waves 2-5, by expert participation") +
  facet_wrap(~wave) +
  theme_custom(legend.position = "none")

ggsave(file = "figures/png/fig_1.png", height = 7, width = 9)

ggsave("figures/tiff/fig_1.tiff", height = 7, width = 9)
```

# Figure 2: Twitter usage 

```{r}
# the question on Twitter usage was asked in BLW wave 17
expert_twitter = expert_combined %>% 
  filter(wave == 17) %>% 
  # based on answers to two questions:
  # a. how often respondent uses Twitter
  # b. how often respondent tweets about American democracy
  # we'll make three categories
  mutate(
    twitter_3cat = case_when(
      twitter_democracy_usage %in% c(1, 2, 3) ~ "Democracy tweeters", 
      twitter_democracy_usage %in% c(4, 5, 6, 7) ~ "Twitter consumers", 
      twitter_usage == 7 ~ "Non-users"
    )
  ) %>% 
  # now pivot to long format for three different types of ratings:
  # rusa_now -- rating of US democracy now
  # rusa_5y -- rating of US democracy in 2027
  # rusa_10y -- rating of US democracy in 2032
  dplyr::select(twitter_3cat, rusa_now, rusa_5y, rusa_10y) %>% 
  filter(!is.na(twitter_3cat)) %>% 
  pivot_longer(cols = -twitter_3cat,
               names_to = "when", 
               values_to = "rating")

# compute mean ratings by twitter category
expert_twitter %>%   
  group_by(twitter_3cat, when) %>% 
  summarise(
    mean_rating = mean(rating, na.rm = T),
    n = n(),
    ci_l = mean_rating - qt(0.975, n) * (sqrt(var(rating, na.rm = T) / n)),
    ci_u = mean_rating + qt(0.975, n) * (sqrt(var(rating, na.rm = T) / n))
  ) %>% 
  # reorder factors for figure
  mutate(
    when = factor(when, levels = c("rusa_now", "rusa_5y", "rusa_10y")),
    twitter_3cat = factor(twitter_3cat, levels = c("Democracy tweeters", 
                                                   "Twitter consumers", 
                                                   "Non-users"))
  ) %>% 
  ggplot(aes(
    x = when,
    y = mean_rating,
    ymin = ci_l, 
    ymax = ci_u,
    color = twitter_3cat,
    shape = twitter_3cat,
    group = twitter_3cat
  )) +
  geom_point(size = 2.25, position = position_dodge(0.1)) +
  geom_errorbar(width = 0,
                size = 1, alpha = 0.6, position = position_dodge(0.1)) +
  geom_line(lwd = .55, linetype = "dotted", position = position_dodge(0.1)) +
  scale_y_continuous(breaks = seq(0, 100, by = 20)) +
  scale_shape_manual(name = NULL, values = c(16, 17, 18)) +
  scale_x_discrete(labels = c("2022", "2027", "2032")) +
  scale_color_viridis_d(name = NULL, end = 0.7) +
  labs(
    y = "",
    x = "",
    title = "Expert ratings of U.S. democracy by Twitter usage",
    caption = "Ratings of U.S. democracy by experts on a 0-100 scale.\n Figure shows mean values by Twitter usage. \n Vertical error bars are 95% confidence intervals.\nSource: @BrightLineWatch - October 2022"
  ) +
  # to allow geom_segment() to operate beyond the limits of the y axis
  coord_cartesian(ylim = c(0, 100), clip = "off") +
  theme_custom(legend.position = c(.15, .2)) %+replace%
  theme(
    legend.key.size = unit(.8, "lines"),
    legend.background = element_rect(
      colour = '#636363',
      fill = 'white',
      linetype = 'solid'
    )
  )

ggsave("figures/png/fig_2.png", height = 5, width = 7)
ggsave("figures/tiff/fig_2.tiff", height = 5, width = 7)
```

# Figure 3: ratings by V-Dem participation

```{r}
# the question on V-Dem participation was asked in BLW wave 19
# here we transform wave 19 to long format, where each row is one respondent
# rating one country
# three questions on v-dem: 
# a. was respondent ever invited to code for v-dem
# b. [if yes]: did respondent accept to code
# c. [if no]: would respondent accept to code if invited
ratings_by_vdem_long = 
  expert_combined %>% 
  filter(!is.na(vdem_invite) & wave == 19) %>% 
  mutate(
    # first grouping, for left facet: whether expert has ever been invited 
    group_1 = case_when(
      vdem_invite == 1 ~ "Invited", 
      vdem_invite == 2 ~ "Not invited"
    ),
    # 2nd grouping, for right facet: 
    # respondents who participated or would participate
    # then respondents who declined to participate or would not participate
    group_2 = case_when(
      (vdem_invite == 1 & vdem_participate == 1) | 
        (vdem_invite == 2 & vdem_would == 1) ~ "Did or would participate",
      (vdem_invite == 1 & vdem_participate == 2) | 
        (vdem_invite == 2 & vdem_would == 2) ~ "Did not or would not participate",
    )
    ) %>% 
  # only relevant columns
  select(group_1, group_2, matches("rating_[a-z]+$")) %>% 
  # long format: at the respondent-country level
  pivot_longer(cols = contains("rating"), 
               names_to = "country", 
               values_to = "rating") %>% 
  mutate(
    # current format is e.g. "rating_TUR" for Turkey
    # extract three-letter country abbrevation; convert to full country name
    # note: exception for Brazil (coded as BRZ by ourselves -- dating back
    # to prior surveys -- but the correct ISO code is BRA)
    country =
      ifelse(
        str_extract(country, "[A-Z]+") == "BRZ", 
        "Brazil", 
        countrycode::countrycode(str_extract(country, "[A-Z]+"), 
                                 origin = "iso3c", destination = "country.name")
      )
  ) %>% 
  # respondents only rated a random subset of all countries; get rid of NAs
  filter(!is.na(rating))

ratings_2facets = 
  # compute mean ratings by country and group, separately for the two different
  # classification systems outlined above (and bind together the results)
  bind_rows(
    mean_and_ci(
      ratings_by_vdem_long, outcome = "rating", by = "group_1+country"
    ) %>% 
      rename(by = group_1) %>% mutate(facet = "left"),
    mean_and_ci(
      ratings_by_vdem_long, outcome = "rating", by = "group_2+country"
    ) %>% 
      rename(by = group_2) %>% mutate(facet = "right")
  ) %>% 
  filter(!is.na(by))

# making figure for left facet: ratings of countries by v-dem invite
ratings_2facets_left_gg = ratings_2facets %>% 
  mutate(
    # reorder country by mean rating among non-invited
    country = factor(country, levels = ratings_2facets %>% 
                       filter(by == "Not invited") %>% 
                       arrange(mean) %>% pull(country))
  ) %>% 
  filter(by %in% c("Invited", "Not invited")) %>% 
  ggplot(aes(y = country, x = mean, xmin = ci_l, xmax = ci_u, col = by)) +
  geom_point(position = position_dodge(0.5)) +
  geom_errorbar(width = 0, size = 1, position = position_dodge(0.5), alpha = 0.6) +
  scale_color_manual(values = c("Not invited" =  "#003f5c", 
                                "Invited" = "#ffa600"),
                     breaks = c("Invited", "Not invited"),
                     labels = c("Invited", "Not invited"),
                     name = "") +
  labs(x = "Mean rating", 
       y = "",
       title = "Invitations") +
  scale_x_continuous(limits = c(0, 100)) +
  guides(col=guide_legend(ncol=1, keywidth = 1)) +
  theme_custom(plot.title.size = 1.2)

# right facet of the figure: mean ratings by the second grouping 
ratings_2facets_right_gg = ratings_2facets %>% 
  mutate(
    country = factor(country, levels = ratings_2facets %>% 
                       filter(by == "Not invited") %>% 
                       arrange(mean) %>% pull(country))
  ) %>% 
  filter(
    by %in% c("Did or would participate", "Did not or would not participate")
  ) %>% 
  ggplot(aes(y = country, x = mean, xmin = ci_l, xmax = ci_u, col = by)) +
  geom_point(position = position_dodge(0.5)) +
  geom_errorbar(width = 0, size = 1, position = position_dodge(0.5), alpha = 0.6) +
  scale_color_manual(values = c("Did or would participate" = "#ffa600", 
                                "Did not or would not participate" =  "#003f5c"),
                     breaks = c("Did or would participate", 
                                "Did not or would not participate"),
                     name = "") +
  labs(x = "", 
       y = "", 
       title = "Participation") +
  scale_x_continuous(limits = c(0, 100)) +
  # to have blank y axis text
  scale_y_discrete(labels = rep("", 13)) +
  guides(col=guide_legend(ncol=1, keywidth = 1)) +
  theme_custom(plot.title.size = 1.2)

# put the two figures together using ggarrange
ratings_arrange = ggpubr::ggarrange(
  ratings_2facets_left_gg,
  ratings_2facets_right_gg, 
  # more horizontal space for the left part (since it contains the y axis labels)
  widths = c(1.5, 1),
  common.legend = FALSE
)

# annotate figure with title and caption
ggpubr::annotate_figure(
  ratings_arrange, 
  top = ggpubr::text_grob("Invitation bias and participation bias among V-Dem coders",
                          size = 18, face = "bold"),
  bottom = ggpubr::text_grob(
    label = "Source: @BrightLineWatch - July 2023", hjust = 1, x = 1, size = 14, 
    color = "darkgrey", face = "italic", 
    # by default, space between lines is too big, so make it smaller
    lineheight = 0.85
  )
)

ggsave("figures/png/fig_3.png", height = 7, width = 9)
ggsave("figures/tiff/fig_3.tiff", height = 7, width = 9)
```

# Figure 4: ratings of other countries

```{r}
# This figure compares ratings of other countries in wave 17 (the most recent
# BLW wave for which both experts and public rated democracy in countries
# other than the U.S.)

# compute mean ratings by country for the expert sample
expert_rating_ts = expert_combined %>% 
  filter(wave == 17) %>% 
  # to long format: respondent-country level
  select(contains("rating"), rating_USA = rusa_now) %>% 
  pivot_longer(everything(), 
               names_to = "country", 
               values_to = "rating") %>% 
  group_by(country) %>% 
  mean_and_ci(outcome = "rating", by = "country") %>% 
  filter(!is.na(mean))

# mean ratings by country for the public sample
public_rating_ts = public_combined %>% 
  filter(wave == 17) %>% 
  select(weight, contains("rating")) %>%
  # long format
  pivot_longer(contains("rating"), 
               names_to = "country", 
               values_to = "rating") %>% 
  group_by(country) %>% 
  mean_and_ci(outcome = "rating", by = "country", survey_weight = "weight") %>% 
  filter(!is.na(mean))

# bind public and expert means together; recode country to full name
ratings_ts = bind_rows(
  expert_rating_ts %>% mutate(who = "Experts"), 
  public_rating_ts %>% mutate(who = "Public")
) %>% 
  mutate(
    country = str_extract(country, "[A-Z]{3}") %>% 
      recode("VEN" =  "Venezuela", "TUR" = "Turkey", 
                     "SAU" = "Saudi Arabia", "RUS" = "Russia", 
                     "PRK" = "North Korea",  "PHL" = "Philippines", 
                     "MEX" = "Mexico", "ISR" = "Israel", "IRQ" = "Iraq",
                     "GBR" = "Great Britain", "CHN" = "China", "CAN" = "Canada", 
                     "BRZ" = "Brazil", "USA" = "United States")
  )

# order country by expert ratings in w17
ratings_ts$country = factor(
  ratings_ts$country, 
  levels = ratings_ts %>% 
    filter(who == "Experts") %>% 
    arrange(mean) %>% 
    pull(country)
)

# figure
ratings_ts %>% 
  ggplot(aes(x = mean, xmin = ci_l, xmax = ci_u, 
           y = country, col = fct_rev(who), shape = fct_rev(who),
           group = fct_rev(who))) +
  geom_point(size = 2.25, position = position_dodge(0.75), stroke = 1) +
  geom_errorbar(width = 0, size = 1, alpha = 0.6,
                position = position_dodge(0.75)) +
  labs(y = "", 
       x = "Mean rating",
       title = "Expert and public ratings of democracy in 13 countries",
    caption = "Figure shows mean rating by experts and the public. \nData from Wave 17 (Oct. 2022)\nSource: @BrightLineWatch - October 2022") +
  scale_color_manual(values = c("Experts" = "#1A712F",
                                "Public" = "#632E7A"), 
                     name = "") +
  scale_shape_manual(values = c(16, 1), name = "") +
  scale_x_continuous(limits = c(0, 100),
                     breaks = seq(0, 100, 20)) +
  theme_custom(axis.title.x.margin = 2)
  
ggsave("figures/png/fig_4.png", height = 6, width = 9)
ggsave("figures/tiff/fig_4.tiff", height = 6, width = 9)
```

# Figure 5: ratings of U.S. democracy

```{r}
# This figure shows mean ratings of U.S. democracy for experts and public, 
# across all waves of BLW surveys

# time-series of experts
expert_rating_ts <- expert_combined %>%
  mean_and_ci(outcome = "rusa_now", by = "wave") %>% 
  mutate(who = "Experts")

# time-series for public
public_rating_ts = public_combined %>% 
  mean_and_ci(outcome = "rating_USA", by = "wave", survey_weight = "weight") %>% 
  mutate(who = "Public")

# bind the above together
rating_ts_combined = bind_rows(
  # the "date_me_fxn" function takes a value for BLW survey wave (e.g. 1) and 
  # produces a date showing when the survey was fielded
  date_me_fxn(expert_rating_ts),
  date_me_fxn(public_rating_ts)
)


rating_ts_combined = rating_ts_combined %>%
  mutate(who = factor(who, levels = c(
                                      "Experts",
                                      "Public")))

rating_ts_combined %>% 
  # wave 1 had a 1-10 rating, so we exclude it for that reason
  filter(!is.na(who) & wave != 1) %>% 
  ggplot(aes(
    x = date,
    y = mean,
    color = who,
    shape = who
  )) +
  geom_point(size = 1) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0,
                size = .2) +
  geom_line(lwd = .35) +
  scale_y_continuous(breaks = seq(0, 100, by = 20)) +
  scale_x_date(
    limits = as.Date(c('1/1/2017', '1/1/2024'), format = "%d/%m/%Y"),
    breaks = as.Date(c('1/1/2017', '1/1/2018', '1/1/2019', '1/1/2020', '1/1/2021', '1/1/2022', '1/1/2023', '1/1/2024'), format = "%d/%m/%Y"),
    labels = c(2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024)
  ) +
  scale_shape_manual(name = NULL, 
                     values = c("Public" = 22, "Experts" = 21)) +
  #scale_fill_manual(values = cols_legend) +
  scale_color_manual(name = NULL, 
                     values = c("Public" = "#632E7A","Experts" = "#1A712F")) +
  labs(
    y = "",
    x = "",
    title = "Expert and public ratings of U.S. democracy: 2017-2023",
    caption = "Ratings of U.S. democracy by the public and experts on a 0-100 scale.\n Figure shows mean values across nineteen survey waves. \n Vertical error bars are 95% confidence intervals.\nSource: @BrightLineWatch - July 2023"
  ) +
  # to allow geom_segment() to operate beyond the limits of the y axis
  coord_cartesian(ylim = c(0, 100), clip = "off") +
  theme_custom(axis.title.x.margin = -10, 
               legend.position = c(.15, .2)) %+replace%
  theme(legend.background = element_rect(
      colour = '#636363',
      fill = 'white',
      linetype = 'solid'
    )
    )

ggsave("figures/png/fig_5.png", height = 5, width = 7)
ggsave("figures/tiff/fig_5.tiff", height = 5, width = 7)
```

# Figure 6: 30 performance items

```{r}
# This figure shows, for the latest BLW survey wave (wave 19), expert and public
# assessments of the 30 performance items 

# Public PERF ratings long perforamance dataset at the respondent-indicator level
public_perf_long = public_combined %>%
  # 3 category PID, including leaners as partisans
  mutate(
    partisan = case_when(
      pid7 %in% c("Strong Democrat",
                  "Lean Democrat",
                  "Not very strong Democrat") ~ "Democrat", 
      pid7 %in% c("Strong Republican",
                  "Lean Republican",
                  "Not very strong Republican") ~ "Republican",
      pid7 %in% "Independent" ~ "Independent"
    )
  ) %>% 
  dplyr::select(wave, partisan, weight, matches("^perf*")) %>% 
  # longer formqt: respondent-indicator level
  pivot_longer(cols = starts_with("perf"),
               names_to = "item",
               values_to = "performance") %>% 
  filter(!is.na(performance)) %>%
  # make 4-point variable into a dummy
  # 4-point scale is: US fully meets/mostly meets/partly meets/does not meet at 
  # all
  mutate(
    perf_positive = ifelse(
      performance %in% c("The U.S. fully meets this standard", 
                         "The U.S. mostly meets this standard"),
      1, 0)
  )

# proportion positive by wave and item
public_perf_agg = 
  mean_and_ci(
    public_perf_long, 
    outcome = "perf_positive", 
    by = "wave+item", 
    survey_weight = "weight"
  ) %>% 
  mutate(
    # "rename_perf" is a function that takes the variable names of the 30 
    # performance items and returns a label for each
    item = rename_perf(item),
    who = "Public"
  ) %>% 
  arrange(desc(wave), item)

#-------------------------------------------------------------------------------
# Expert PERF ratings
# long performance dataset: at the respondent-indicator level
expert_perf_long = expert_combined %>%   
  # PERF values to character
  mutate(across(contains("perf_"), 
                ~recode(.x, `1` = "The U.S. does not meet this standard", 
                        `2` = "The U.S. partly meets this standard", 
                        `3` = "The U.S. mostly meets this standard", 
                        `4` = "The U.S. fully meets this standard", 
                        `5` = "Not sure"))) %>% 
  dplyr::select(wave, matches("^perf*")) %>% 
  pivot_longer(cols = starts_with("perf"),
               names_to = "item",
               values_to = "performance") %>% 
  filter(!is.na(performance)) %>%
  # dummy: mostly meets or fully meets
  mutate(
    perf_positive = ifelse(
      performance %in% c("The U.S. fully meets this standard", 
                         "The U.S. mostly meets this standard"),
      1, 0
    )
  )

expert_perf_agg = expert_perf_long %>% 
  mean_and_ci(outcome = "perf_positive", by = "wave+item") %>% 
  mutate(item = rename_perf(item),
         who = "Experts") %>% 
  arrange(desc(wave), item)

# combine experts and public
combined_perf_agg = bind_rows(
  public_perf_agg, expert_perf_agg
)

# reorder factor according to % of experts in w19 who rate mostly or fully meets
combined_perf_agg$item = factor(
  combined_perf_agg$item, 
  levels = combined_perf_agg %>% 
    filter(who == "Experts" & wave == 19) %>% 
    arrange(mean) %>% 
    pull(item)
)

# Plotting w19 experts v. w19 public
combined_perf_agg %>% 
  filter(wave == 19) %>% 
  ggplot(aes(y = item,  x = mean*100, xmin = ci_l*100, 
             xmax = ci_u*100, color = who, shape = who)) +
  geom_point(size = 2) +
  geom_errorbar(size = 0.5, width = 0, alpha = 0.6) +
  labs(x = "% who answered statement U.S. mostly or fully meets this standard",
      y = "",
      title = "Expert and public ratings of U.S. democratic performance",
      caption = "Statements are in descending order of performance ratings for experts surveyed in June/July 2023.\n Horizontal error bars are 95% confidence intervals.\nSource: @BrightLineWatch - July 2023") +
  scale_shape_manual(values = c(16,15), name = "") +
  scale_color_manual(values = c("#1B813E", "#632E7A"), name = "") +
  expand_limits(y = c(0, 30), x = c(0, 103)) +
  theme_custom(base_size = 11)

ggsave("figures/png/fig_6.png", width = 8, height = 7)
ggsave("figures/tiff/fig_6.tiff", width = 8, height = 7)
```

# Figure 7: correlations between pairs

```{r}
# compute % of positive evaluations for each item, wave, and partisan group
partisan_perf_agg = mean_and_ci(
  public_perf_long, 
  outcome = "perf_positive", 
  by = "wave+item+partisan", 
  survey_weight = "weight"
)

# correlation across the 30 items, by wave, between experts and Democrats
cors_dems = bind_rows(
  expert_perf_agg,
  partisan_perf_agg %>% filter(partisan == "Democrat") %>% 
    rename(who = partisan) %>% 
    mutate(item = rename_perf(item))
) %>% 
  select(wave, item, mean, who) %>% 
  pivot_wider(id_cols = c(wave, item),
              values_from = mean, 
              names_from = who) %>% 
  group_by(wave) %>% 
  summarise(corr_30perf = cor(Experts, Democrat)) %>% 
  mutate(groups = "Experts and Democrats")

# correlation between experts and Republicans
cors_reps = bind_rows(
  expert_perf_agg,
  partisan_perf_agg %>% filter(partisan == "Republican") %>% 
    rename(who = partisan) %>% 
    mutate(item = rename_perf(item))
) %>% 
  select(wave, item, mean, who) %>% 
  pivot_wider(id_cols = c(wave, item),
              values_from = mean, 
              names_from = who) %>% 
  group_by(wave) %>% 
  summarise(corr_30perf = cor(Experts, Republican)) %>% 
  mutate(groups = "Experts and Republicans")

# correlation between experts and Independents
cors_inds = bind_rows(
  expert_perf_agg,
  partisan_perf_agg %>% filter(partisan == "Independent") %>% 
    rename(who = partisan) %>% 
    mutate(item = rename_perf(item))
) %>% 
  select(wave, item, mean, who) %>% 
  pivot_wider(id_cols = c(wave, item),
              values_from = mean, 
              names_from = who) %>% 
  group_by(wave) %>% 
  summarise(corr_30perf = cor(Experts, Independent)) %>% 
  mutate(groups = "Experts and Independents")

bind_rows(
  cors_dems, 
  cors_reps,
  cors_inds
) %>% 
  # wave 1, 2, and 11: only expert survey
  filter(wave > 2 & wave != 11) %>% 
  date_me_fxn() %>% 
  ggplot(aes(x = date, y = corr_30perf, col = groups)) + 
  geom_point() +
  geom_line() +
  scale_x_date(
    limits = as.Date(c('1/1/2017', '1/1/2024'), format = "%d/%m/%Y"),
    breaks = as.Date(c('1/1/2017', '1/1/2018', '1/1/2019', '1/1/2020', 
                       '1/1/2021', '1/1/2022', '1/1/2023', '1/1/2024'), format = "%d/%m/%Y"),
    labels = c(2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024)
  ) +
  labs(y = "Correlation coefficient across 30 performance items") +
  scale_y_continuous(limits = c(0, 1),
                     breaks = seq(0, 1, 0.2)) +
  scale_color_manual(values = c("#003f5c", "darkgrey", "#ffa600")) +
  theme_custom() %+replace%
  theme(axis.title.x = element_blank())

ggsave("figures/png/fig_7.png", height = 7, width = 9)
ggsave("figures/tiff/fig_7.tiff", height = 7, width = 9)
```

# Data mentioned in the text

```{r}
# number of unique LuthID values
# this approximates the number of unique respondents across all BLW waves
# though note that this is an upper-bound, e.g. some professor who moved 
# institution probably has two separate LuthIDs for their two email addresses
length(unique(expert_combined$LuthID))

# interquartile range of proportion of "Not sure" answers to the 30 performance
# items, among public respondents
public_perf_long %>% 
  mutate(
    perf_ns = ifelse(performance == "Not sure", 1, 0)
  ) %>% 
  mean_and_ci(outcome = "perf_ns", by = "wave+item", survey_weight = "weight") %>% 
  ungroup() %>% 
  summarise(
    quantile(mean)
  )
```



